Code
import eia_api as api
import eia_etl as etl
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxThe goal of this doc is to execute an initial data pull of the hourly demand for California balancing authority subregion (CISO). This includes the following four independent system operators:
The data backfill process includes the following steps:
import eia_api as api
import eia_etl as etl
import eia_data
import pandas as pd
import numpy as np
import requests
import json
import os
import datetime
import plotly.express as pxraw_json = open("../metadata/series.json")
meta_json = json.load(raw_json)
series = pd.DataFrame(meta_json["series"])
api_path = meta_json["api_path"]facets_template = {
"parent" : None,
"subba" : None
}
start = datetime.datetime(2018, 6, 20, 1)
end = datetime.datetime(2024, 2, 18, 1)
offset = 2250
eia_api_key = os.getenv('EIA_API_KEY')
meta_path = "../metadata/ciso_log_py.csv"
data_path = "../csv/ciso_grid_py.csv"metadata = api.eia_metadata(api_key = eia_api_key, api_path = api_path)
metadata.meta.keys()
print(metadata.meta["startPeriod"])
print(metadata.meta["endPeriod"])2018-06-19T05
2024-02-22T08
start = datetime.datetime(2018, 7, 1, 8)
# start = datetime.datetime(2024, 1, 1, 1)
end = datetime.datetime(2024, 2, 18, 1)
for i in series.index:
facets = facets_template
facets["parent"] = series.at[i, "parent_id"]
facets["subba"] = series.at[i, "subba_id"]
print(facets)
temp = api.eia_backfile(api_key = eia_api_key,
api_path = api_path+ "data",
facets = facets,
start = start,
end = end,
offset = offset)
ts_obj = pd.DataFrame(np.arange(start = start, stop = end + datetime.timedelta(hours = 1), step = datetime.timedelta(hours = 1)).astype(datetime.datetime), columns=["index"])
ts_obj = ts_obj.merge(temp.data, left_on = "index", right_on = "period", how="left")
ts_obj.drop("period", axis = 1, inplace= True)
ts_obj = ts_obj.rename(columns= {"index": "period"})
meta_temp = eia_data.create_metadata(data = ts_obj, start = start, end = end, type = "backfile")
meta_temp["index"] = 1
meta_df = pd.DataFrame([meta_temp])
if i == series.index.start:
data = ts_obj
meta = meta_df
else:
data = data._append(ts_obj)
meta = meta._append(meta_df){'parent': 'CISO', 'subba': 'PGAE'}
{'parent': 'CISO', 'subba': 'SCE'}
{'parent': 'CISO', 'subba': 'SDGE'}
{'parent': 'CISO', 'subba': 'VEA'}
print(meta)
# The initial pull has some missing values
data.head() index parent subba time start \
0 1 CISO PGAE 2024-02-23 03:32:21.312337+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-02-23 03:32:36.118749+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-02-23 03:32:51.011954+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-02-23 03:33:06.089176+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
end_match n_obs na type update success \
0 True 49386 98 backfile False False
0 True 49386 98 backfile False False
0 True 49386 98 backfile False False
0 True 49386 98 backfile False False
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
| period | subba | subba-name | parent | parent-name | value | value-units | |
|---|---|---|---|---|---|---|---|
| 0 | 2018-07-01 08:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 12522.0 | megawatthours |
| 1 | 2018-07-01 09:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11745.0 | megawatthours |
| 2 | 2018-07-01 10:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 11200.0 | megawatthours |
| 3 | 2018-07-01 11:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10822.0 | megawatthours |
| 4 | 2018-07-01 12:00:00 | PGAE | Pacific Gas and Electric | CISO | California Independent System Operator | 10644.0 | megawatthours |
# Save the data
meta["success"] = True
d = eia_data.append_data(data_path = data_path, new_data = data, init = True, save = True)
# Save the metadata
meta["update"] = True
m = eia_data.append_metadata(meta_path = "../metadata/ciso_log_py.csv", meta = meta, save = True, init = True)
print(m)Initial data pull
Save the data to CSV file
index parent subba time start \
0 1 CISO PGAE 2024-02-23 03:32:21.312337+00:00 2018-07-01 08:00:00
0 1 CISO SCE 2024-02-23 03:32:36.118749+00:00 2018-07-01 08:00:00
0 1 CISO SDGE 2024-02-23 03:32:51.011954+00:00 2018-07-01 08:00:00
0 1 CISO VEA 2024-02-23 03:33:06.089176+00:00 2018-07-01 08:00:00
end start_act end_act start_match \
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
0 2024-02-18 01:00:00 2018-07-01 08:00:00 2024-02-18 01:00:00 True
end_match n_obs na type update success \
0 True 49386 98 backfile True True
0 True 49386 98 backfile True True
0 True 49386 98 backfile True True
0 True 49386 98 backfile True True
comments
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
0 Missing values were found;
We will use Plotly to visualize the series:
d = data.sort_values(by = ["subba", "period"])
p = px.line(data, x="period", y="value", color="subba")
p.show()